# from Coronavirus dashboard
# https://coronavirus.data.gov.uk/healthcare
admissions <- read_csv(str_c(.rawDir, "admissions_data_2020-Nov-09.csv"))
deaths <- read_csv(str_c(.rawDir,"deaths_data_2020-Nov-09.csv"))
admi <- admissions %>% 
  filter(areaName == "England") %>% 
  select(date, newAdmissions) %>% 
  rename(admi = newAdmissions)

dths <- deaths %>% 
  filter(areaName == "England") %>% 
  select(date, newDeaths28DaysByDeathDate) %>% 
  rename(dths = newDeaths28DaysByDeathDate)

dailyDat <- admi %>% 
  left_join(dths, by = "date") %>% 
  mutate(isoWk = isoweek(date))

dailyMovAvgDat <- dailyDat %>% 
  mutate(across(c(admi, dths), ~ zoo::rollmean(., 7, na.pad = TRUE, align = "center")))

Daily mortality and hospital admissions numbers for England.

dailyDat %>% 
  ggplot(aes(x = date)) +
  geom_line(aes(y = admi), color = "#440154FF", size = 1) +
  geom_line(aes(y = dths), color = "#440154FF", size = 1) +
  scale_x_date(name = NULL) +
  scale_y_continuous(name = NULL) +
  labs(
    title = "Daily hospitalisations and deaths in England"
    , caption = "Source: UK government Coronavirus dashboard, coronavirus.data.gov.uk")

Calculate 7-day centred daily average to remove some of the noise.

dailyMovAvgDat %>% 
  ggplot(aes(x = date)) +
  geom_point(aes(x = date, y = admi), color = "#D3D3D3FF", data = dailyDat) +
  geom_point(aes(x = date, y = dths), color = "#D3D3D3FF", data = dailyDat) +
  geom_line(aes(y = admi), color = "#440154FF", size = 1) +
  geom_line(aes(y = dths), color = "#440154FF", size = 1) +
  scale_x_date(name = NULL) +
  scale_y_continuous(name = NULL) +
  labs(
    title = "Daily hospitalisations and deaths in England"
    , subtitle = "7-day centred moving average"
    , caption = "Source: UK government Coronavirus dashboard, coronavirus.data.gov.uk")

A different perspective for viewing the link between hospitalisations and deaths: Move the number of hospital admissions to the x-axis, the number of Covid-19 deaths to the y-axis, and connect the dots according to time.

tmp_date <- dailyDat %>% sample_frac(0.1)

dailyDat %>% 
  ggplot(aes(x = admi, y = dths, label = as.character(date))) +
  geom_point(color = "#D3D3D3FF") +
  geom_text_repel(data = tmp_date) +
  geom_segment(aes(xend = c(tail(admi, n = -1), NA) , yend = c(tail(dths, n = -1), NA)), color = "#808080FF") +
  scale_x_continuous(name = "hospitalisations") +
  scale_y_continuous(name = "deaths") +
  labs(
    title = "Daily hospitalisations and deaths in England"
    , caption = "Source: UK government Coronavirus dashboard, coronavirus.data.gov.uk") +
  theme(
    panel.grid.major.x = element_line(size = .2, color = "#D3D3D3FF")
  )

Not very insightful! We need to account for the exponential character of the spread of the virus - logarithmic scales should help.

tmp_date <- dailyDat %>% sample_frac(0.1)

dailyDat %>% 
  ggplot(aes(x = admi, y = dths, label = as.character(date))) +
  geom_point(color = "#D3D3D3FF") +
  geom_text_repel(data = tmp_date) +
  geom_segment(aes(xend = c(tail(admi, n = -1), NA) , yend = c(tail(dths, n = -1), NA)), color = "#808080FF") +
  scale_x_log10(name = "hospitalisations (log scale)") +
  scale_y_log10(name = "deaths (log scale)") +
  labs(
    title = "Daily hospitalisations and deaths in England"
    , caption = "Source: UK government Coronavirus dashboard, coronavirus.data.gov.uk") +
  theme(
    panel.grid.major.x = element_line(size = .2, color = "#D3D3D3FF")
  )

Still quite noisy - let’s use the 7-day average to try and get a clearer picture of the trends.

dailyDat %>% 
  ggplot() +
  geom_point(aes(x = admi, y = dths), color = "#DCDCDCFF") +
  geom_segment(aes(x = admi, y = dths, xend = c(tail(admi, n = -1), NA) , yend = c(tail(dths, n = -1), NA)), color = "#DCDCDCFF") +
  geom_point(aes(x = admi, y = dths), color = "#440154FF", data = dailyMovAvgDat) +
  geom_segment(aes(x = admi, y = dths, xend = c(tail(admi, n = -1), NA) , yend = c(tail(dths, n = -1), NA))
               , color = "#440154FF", data = dailyMovAvgDat) +
  scale_x_log10(name = "hospitalisations (log scale)") +
  scale_y_log10(name = "deaths (log scale)") +
  labs(
    title = "Daily hospital admissions and deaths in England"
      , subtitle = "7-day centred moving average"
      , caption = "Source: UK government Coronavirus dashboard, coronavirus.data.gov.uk") +
  theme(
    panel.grid.major.x = element_line(size = .2, color = "#D3D3D3FF")
  )

Let’s help the reader by labelling some key dates.

startDt <- dailyMovAvgDat %>% filter(!is.na(admi)) %>% filter(date == first(date)) %>% pull(date)
endDt <- dailyMovAvgDat %>% filter(!is.na(admi)) %>% filter(date == last(date)) %>% pull(date)

keyDates <- dailyMovAvgDat %>%
  filter(day(date) == 1 | date %in% c(startDt, endDt)) %>%
  mutate(aboveorbelow = case_when(
    date %in% seq(ymd("2020-05-01"), ymd("2020-08-01"), by = "months") ~ "above", TRUE ~ "below"))

dailyDat %>% 
  ggplot() +
  geom_point(aes(x = admi, y = dths), color = "#DCDCDCFF") +
  geom_segment(aes(x = admi, y = dths, xend = c(tail(admi, n = -1), NA) , yend = c(tail(dths, n = -1), NA)), color = "#DCDCDCFF") +
  geom_point(aes(x = admi, y = dths), color = "#440154FF", data = dailyMovAvgDat) +
  geom_segment(aes(x = admi, y = dths, xend = c(tail(admi, n = -1), NA) , yend = c(tail(dths, n = -1), NA))
               , color = "#440154FF", data = dailyMovAvgDat) +
  geom_text_repel(aes(x = admi, y = dths, label = as.character(format(date, "%d-%b")))
                  , nudge_x = ifelse(keyDates$aboveorbelow == "above", -.2, .2), nudge_y = ifelse(keyDates$aboveorbelow == "above", .2, -.2)
                  , color = "#2C2825FF"
                  , segment.size = .2, segment.color = "#D3D3D3FF"
                  , data = keyDates) +
  scale_x_log10(name = "hospitalisations (log scale)") +
  scale_y_log10(name = "deaths (log scale)") +
  labs(
    title = "Daily hospital admissions and deaths in England"
    , subtitle = "7-day centred moving average"
    , caption = "Source: UK government Coronavirus dashboard, coronavirus.data.gov.uk") +
  theme(
    panel.grid.major.x = element_line(size = .2, color = "#D3D3D3FF")
  )

And animate!

# plot only up to 7 days prior to the most recent data point to account for lag in death registrations
dailyMovAvgDat <- dailyMovAvgDat %>% 
  filter(!is.na(admi), date > as.Date("2020-03-22"), date < (max(date) - days(7))) %>% 
  arrange(date)

startDt <- first(dailyMovAvgDat$date)
endDt <- last(dailyMovAvgDat$date)

keyDates <- dailyMovAvgDat %>%
    filter(day(date) == 1 | date %in% c(startDt, endDt, as.Date("2020-05-10"))) %>%
    mutate(keyDate = case_when(
      day(date) == 1 ~ date, date == endDt ~ date, TRUE ~ NA_Date_)) %>% 
    mutate(keyDate = case_when(
      keyDate == endDt ~ as.character(format(keyDate, "%d-%b")), day(keyDate) == 1 ~ as.character(format(keyDate, "%d-%b")), TRUE ~ as.character(keyDate))) %>% 
    mutate(keyDate = case_when(
      date == "2020-05-10" ~ "10-May, lockdown easing begins", date == "2020-03-23" ~ "23-Mar, lockdown starts", TRUE ~ keyDate)) %>% 
    mutate(aboveorbelow = case_when(
      date %in% c(seq(ymd("2020-05-01"), ymd("2020-08-01"), by = "months"), "2020-05-10") ~ "above", TRUE ~ "below"))

dailyMovAvgDat <- dailyMovAvgDat %>%  
  left_join(keyDates, by = c("date", "admi", "dths", "isoWk"))

labelVar <- tibble(
    date = dailyMovAvgDat$date
    , x = 50, y = 500)

p <- ggplot() +
    # layer 1
    geom_segment(aes(x = admi, y = dths, xend = c(tail(admi, n = -1), NA) , yend = c(tail(dths, n = -1), NA))
                 , color = "#453781FF", size = .8
                 , data = dailyMovAvgDat) +
    # layer 2
    geom_point(aes(x = admi, y = dths)
               , fill = "#FFFFFFFF"
               , color = "#453781FF"
               , size = 2.5
               , shape = 21
               , data = dailyMovAvgDat) +
    # layer 3
    geom_text(aes(x = x, y = y, label = format(date, "%d-%b"))
              , family = "Fira Sans Medium", size = 8, color = "#686F73FF"
              , data = labelVar) +
    # layer 4 
    geom_text_repel(aes(x = admi, y = dths, label = keyDate)
                    , nudge_x = ifelse(keyDates$aboveorbelow == "above", -.2, .2), nudge_y = ifelse(keyDates$aboveorbelow == "above", .2, -.2)
                    , color = "#2C2825FF", segment.size = .2, segment.color = "#D3D3D3FF", hjust = ifelse(dailyMovAvgDat$aboveorbelow == "above", 1, 0)
                    , seed = 123, point.padding = .25, max.iter = 500
                    , data = dailyMovAvgDat) +
    annotate(geom = "text", x = 10.5, y = 1600
             , label = "Deaths (log scale)", color = "#686F73FF", hjust = 0, vjust = 1, family = "Fira Sans Medium", size = 4) +
    annotate(geom = "text", x = 4900, y = 4.4
             , label = "Hospitalisations (log scale)", color = "#686F73FF", hjust = 1, vjust = 0, family = "Fira Sans Medium", size = 4) +
    scale_x_log10(name = NULL, limits = c(10, 5000), expand = c(0, 0.05), breaks = c(10, 100, 1000, 5000)) +
    scale_y_log10(name = NULL, limits = c(4, 1600), expand = c(0, 0.05), breaks = c(10, 100, 1000)) +
    labs(
      title = "Coming full circle: daily hospitalisations and deaths in England"
      , subtitle = "7-day centred moving average hospitalisations & deaths"
      , caption = "Strategy Unit graphic\nSource: UK government Coronavirus dashboard, coronavirus.data.gov.uk") +
    theme(
      panel.grid.major.x = element_line(size = .2, color = "#D3D3D3FF")
      ) +
    transition_time(date) +
    ease_aes("linear") +
    shadow_mark(past = TRUE, future = FALSE, exclude_layer = 3)

animate(plot = p, fps = 10, duration = 40, end_pause = 50
        , width = 256, height = 160, res = 180, units = "mm"
        , renderer = gifski_renderer())

anim_save(paste0(.figDir, "covid-connected-scatterplot.gif"))